I did this a bit flippantly before, but I want to fomalize the process by which we estimate the uncertainty on emulator predictions.
In [63]:
    
from pearce.emulator import SpicyBuffalo, LemonPepperWet
from pearce.mocks import cat_dict
import numpy as np
from os import path
    
In [64]:
    
import matplotlib
#matplotlib.use('Agg')
from matplotlib import pyplot as plt
%matplotlib inline
import seaborn as sns
sns.set()
    
In [65]:
    
#xi gg
training_file = '/scratch/users/swmclau2/xi_zheng07_cosmo_lowmsat/PearceRedMagicXiCosmoFixedNd.hdf5'
#test_file= '/scratch/users/swmclau2/xi_zheng07_cosmo_test_lowmsat2/'
test_file =  '/scratch/users/swmclau2/xi_zheng07_cosmo_test_lowmsat2/PearceRedMagicXiCosmoFixedNd_Test.hdf5'
    
In [66]:
    
em_method = 'gp'
split_method = 'random'
    
In [67]:
    
a = 1.0
z = 1.0/a - 1.0
    
In [68]:
    
fixed_params = {'z':z}#, 'cosmo': 0}#, 'r':24.06822623}
    
In [69]:
    
from glob import glob
    
In [70]:
    
hp = []
for fname in sorted(glob('/home/users/swmclau2/Git/pearce/bin/optimization/sloppy_joes_indiv_bins/sloppy*.npy')):
    #print fname, len(hp)
    hp.append(np.loadtxt(fname))
    
In [71]:
    
len(hp)
    
    Out[71]:
In [72]:
    
param_names = ['ombh2', 'omch2', 'w0', 'ns', 'ln10As', 'H0', 'Neff', 'logM0', 'sigma_logM', 'logM1', 'alpha']
    
In [73]:
    
pnames = ['bias', 'amp']
pnames.extend(param_names)
pnames.append('amp')
pnames.extend(param_names)
    
In [74]:
    
from collections import defaultdict
metric = []
for _hp in hp:
    metric.append(defaultdict(list))
    for val, pname in zip(_hp, pnames):
        metric[-1][pname].append(val)
    
In [75]:
    
np.random.seed(0)
emu = LemonPepperWet(training_file, method = em_method, fixed_params=fixed_params,
                 custom_mean_function = None, downsample_factor = 0.1)#, hyperparams = {'metric':metric})
    
In [76]:
    
emu.get_param_names()
    
    Out[76]:
In [77]:
    
pred_y, data_y = emu.goodness_of_fit(test_file, statistic = None)
    
In [78]:
    
print len(pred_y[0])
    
    
In [79]:
    
test_x, _, test_err, _ = emu.get_data(test_file, emu.fixed_params)
t, old_idxs  = emu._whiten(test_x)
    
In [80]:
    
train_x, train_y, train_err, info = emu.get_data(training_file, emu.fixed_params)
    
    
In [ ]:
    
    
In [81]:
    
len(train_err)
    
    Out[81]:
In [82]:
    
mean_func_at_params = emu.mean_function(t)
    
In [83]:
    
mean_func_at_params.shape
    
    Out[83]:
In [84]:
    
plt_idx = 4249
print t[0][plt_idx]
print pred_y[0][plt_idx]
print data_y[0][plt_idx]
#print mean_func_at_params[0][plt_idx]
    
    
In [85]:
    
resmat = np.zeros(( 7, 5, 1000, 18))
resmat_log = np.zeros_like(resmat)
predmat = np.zeros(( 7, 5, 1000, 18))
datamat = np.zeros(( 7, 5, 1000, 18))
for bin_no in xrange(len(pred_y)):
    py, dy = pred_y[bin_no], data_y[bin_no]
    for cosmo_no in xrange(7):
        cpy, cdy = py[cosmo_no*5000:(cosmo_no+1)*5000], dy[cosmo_no*5000:(cosmo_no+1)*5000]
        #for realization_no in xrange(5):
        for hod_no in xrange(1000):
            crpy, crdy = cpy[hod_no::1000], cdy[hod_no::1000]
            #for hod_no in xrange(1000):
            #print crpy.std()
            predmat[cosmo_no, :,  hod_no , bin_no] = crpy
            datamat[cosmo_no, :,  hod_no , bin_no] = crdy
            resmat[cosmo_no, :,  hod_no , bin_no] = 10**crpy - 10**crdy
            resmat_log[cosmo_no, :,  hod_no , bin_no] = crpy - crdy
    
In [86]:
    
resmat.shape
    
    Out[86]:
In [87]:
    
N = 5
for cosmo_no in xrange(7):
    for hod_no in xrange(1000):
        r = resmat[cosmo_no, :, hod_no].reshape((-1, 18), order = 'C').T
        print r.shape
        scovmat = r.dot(r.T)/r.shape[1]
        #fig = plt.figure(figsize = (10, 6))
        #plt.subplot(121)
        im = plt.imshow(np.log10(np.abs(scovmat) ))
        plt.colorbar(im)
        #plt.subplot(122)
        #pred = 10**predmat[cosmo_no, :, hod_no].reshape((-1, 18), order = 'C').T
        #print pred.shape
        #data = datamat[cosmo_no, :, hod_no].reshape((-1, 18), order = 'C').T
        #scovmat2 = np.cov(pred)
        #im = plt.imshow(np.log10(np.abs(scovmat2) ), cmap = 'viridis', vmin = -5,vmax = 9)
        #plt.colorbar(im)
        
        plt.show()
        N-=1
        
        if N == 0:
            break
    if N==0:
        break
    
    
    
    
    
    
    
    
    
    
    
In [88]:
    
N = 50
for cosmo_no in xrange(7):
    for hod_no in xrange(1000):
        
        pred = 10**predmat[cosmo_no, :, hod_no].reshape((-1, 18), order = 'C')
        data = 10**datamat[cosmo_no, :, hod_no].reshape((-1, 18), order = 'C')
        for p,d in zip(pred,data):
            plt.plot(emu.scale_bin_centers, np.abs(p-d)/d)
        #for d in data:
        #    plt.plot(emu.scale_bin_centers, d)
        
        plt.xscale('log')
        plt.yscale('log')
        plt.show()
        N-=1
        
        if N == 0:
            break
    if N==0:
        break
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
In [89]:
    
resmat_flat = resmat.reshape((-1, 18)).T
datamat_flat = datamat.reshape((-1, 18)).T
#resmat_hodrealav = np.mean(resmat_realav, axis = 1)
    
In [90]:
    
r_idx = 10
t_bin = t[r_idx]
acc_bin = np.abs(resmat_flat[r_idx])/datamat_flat[r_idx]
    
In [91]:
    
t_bin.shape, acc_bin.shape
    
    Out[91]:
In [92]:
    
percentiles = np.percentile(acc_bin, range(101))
norm_acc_bin = np.digitize(acc_bin, percentiles)
#norm_acc_bin = 100*((acc_bin - acc_bin.min())/acc_bin.max()).astype(int)
    
In [93]:
    
palette = sns.diverging_palette(220, 20, n=len(percentiles)-1, as_cmap=True)
#sns.set_palette(palette)
    
In [94]:
    
pnames = emu.get_param_names()
    
In [95]:
    
for axes1 in xrange(7,11):
    for axes2 in xrange(axes1+1, 11):
        cbar = plt.scatter(t_bin[:,axes1 ], t_bin[:,axes2], c = norm_acc_bin,cmap = palette, alpha = 0.2)
        plt.colorbar(cbar)
        plt.xlabel(pnames[axes1])
        plt.ylabel(pnames[axes2])
        #plt.gray()
        plt.show()
    
    
    
    
    
    
    
In [96]:
    
test_err_bin = test_err[:, r_idx, r_idx]
    
In [97]:
    
plt.hist(np.log10(test_err_bin) )
    
    Out[97]:
    
In [98]:
    
plt.hist(np.log10(np.sqrt(emu.yerr[r_idx])) )
    
    Out[98]:
    
In [99]:
    
len(emu.yerr)
    
    Out[99]:
In [100]:
    
percentiles = np.percentile(np.log10(test_err_bin), [0, 10,20,30,40,50,60,70,80,90,100])
norm_err_bin = np.digitize(np.log10(test_err_bin), percentiles)
    
In [101]:
    
#relevant stat is uncertainty in training error, not test error
for axes1 in xrange(7,11):
    for axes2 in xrange(axes1+1, 11):
        cbar = plt.scatter(t_bin[:,axes1 ], t_bin[:,axes2], c = norm_err_bin,cmap = palette, alpha = 0.2)
        plt.colorbar(cbar)
        plt.xlabel(pnames[axes1])
        plt.ylabel(pnames[axes2])
        #plt.gray()
        plt.show()
    
    
    
    
    
    
    
In [102]:
    
test_err_diag= np.array([test_err[:, r_idx, r_idx] for r_idx in xrange(emu.n_bins)] )
    
In [103]:
    
test_err_diag.mean(axis = 1)
    
    Out[103]:
In [112]:
    
np.sqrt(np.mean(np.square(np.abs(resmat_flat)/(10**datamat_flat) )))
    
    Out[112]:
In [104]:
    
plt.plot(emu.scale_bin_centers, np.mean(np.abs(resmat_flat)/(10**datamat_flat), axis = 1) )
plt.plot(emu.scale_bin_centers, np.mean(np.abs(test_err_diag), axis = 1) )
plt.xscale('log')
    
    
In [105]:
    
resmat_log_flat = resmat_log.reshape((-1, 18)).T
    
In [106]:
    
np.mean(np.array([ye/np.abs(y+1e-9) for ye, y in zip(emu.yerr, emu.y)]) , axis = 1)
    
    Out[106]:
In [107]:
    
plt.plot(emu.scale_bin_centers, np.mean(np.abs(resmat_log_flat)/np.abs(datamat_flat), axis = 1), label = 'Pred acc' )
#plt.plot(emu.scale_bin_centers, np.mean(np.array([ye/np.abs(y+1e-9) for ye, y in zip(emu.yerr, emu.y)]), axis = 1), label = 'Training Err' )
plt.yscale('log')
plt.legend(loc ='best')
plt.xscale('log')
    
    
In [108]:
    
for res, dat in zip(resmat_flat, 10**datamat_flat):
    plt.hist(res/dat, bins = 30)#, bins = np.linspace(-1, 1, 30))
    plt.yscale('log')
    plt.show()
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
In [109]:
    
scovmat = resmat_flat.dot(resmat_flat.T)/(len(pred_y[0])-1)
    
In [110]:
    
im = plt.imshow(np.log10(scovmat) )
plt.colorbar(im)
plt.show()
    
    
In [111]:
    
print np.sqrt(np.diag(scovmat))
    
    
In [ ]:
    
    
In [ ]:
    
    
In [ ]: